{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "866ac389",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\ProgramData\\Anaconda3\\lib\\site-packages\\scipy\\__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.25.0 is required for this version of SciPy (detected version 1.26.0\n",
      "  warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "# import igraph as ig\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "import math\n",
    "import time\n",
    "import datetime,time\n",
    "import os\n",
    "import pickle\n",
    "from zhx_config import zhx_config\n",
    "pd.set_option('display.max_info_columns', 500)\n",
    "pd.set_option('display.max_columns', 1000)\n",
    "pd.set_option('display.max_row', 300)\n",
    "pd.set_option('display.float_format', lambda x: ' %.5f' % x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "1f272771",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'初始数据dir': './data/1_初始数据',\n",
       " '预处理后dir': './data/2_预处理后',\n",
       " '特征_dir': './data/3_特征'}"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "zhx_config"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "0ad02f9b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['初始数据.csv']"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 展开数据集列表\n",
    "os.listdir(zhx_config['初始数据dir'] + '/train')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "c74ba633",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['初始数据.csv']"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "os.listdir(zhx_config['初始数据dir'] + '/test')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "0369cb21",
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_all_csv(path):\n",
    "    return read_all_csv_(path)\n",
    "    \n",
    "def read_all_csv_(path):\n",
    "    data_dict = {}\n",
    "    # print(\"根目录\"+os.getcwd())\n",
    "    for root, ds, fs in os.walk(path):\n",
    "        # print(root)\n",
    "        for f in fs:\n",
    "            if f.endswith('.csv'):\n",
    "                # print(f)\n",
    "                fullname = os.path.join(root, f)\n",
    "                # 去除文件名后缀\n",
    "                data_name = os.path.splitext(f)[0]\n",
    "                data_dict[data_name] = pd.read_csv(fullname)\n",
    "                print(\"读取: \"+fullname)\n",
    "                print(data_dict[data_name].shape)\n",
    "                # \n",
    "    if len(data_dict) == 0:\n",
    "        raise ValueError(\"路径有误, csv文件不存在\")\n",
    "    else:\n",
    "        print(\"文件列表如下:\")\n",
    "        print(data_dict.keys())\n",
    "    return data_dict   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "1e5fedf4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "读取: ./data/1_初始数据/train\\初始数据.csv\n",
      "(9557, 143)\n",
      "文件列表如下:\n",
      "dict_keys(['初始数据'])\n",
      "------------------------------------------------\n",
      "读取: ./data/1_初始数据/test\\初始数据.csv\n",
      "(23856, 142)\n",
      "文件列表如下:\n",
      "dict_keys(['初始数据'])\n"
     ]
    }
   ],
   "source": [
    "# 读取数据集\n",
    "df_train = read_all_csv(zhx_config['初始数据dir'] + '/train')\n",
    "print(\"------------------------------------------------\")\n",
    "df_test = read_all_csv(zhx_config['初始数据dir'] + '/test')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "a59046af",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Id</th>\n",
       "      <th>v2a1</th>\n",
       "      <th>hacdor</th>\n",
       "      <th>rooms</th>\n",
       "      <th>hacapo</th>\n",
       "      <th>v14a</th>\n",
       "      <th>refrig</th>\n",
       "      <th>v18q</th>\n",
       "      <th>v18q1</th>\n",
       "      <th>r4h1</th>\n",
       "      <th>r4h2</th>\n",
       "      <th>r4h3</th>\n",
       "      <th>r4m1</th>\n",
       "      <th>r4m2</th>\n",
       "      <th>r4m3</th>\n",
       "      <th>r4t1</th>\n",
       "      <th>r4t2</th>\n",
       "      <th>r4t3</th>\n",
       "      <th>tamhog</th>\n",
       "      <th>tamviv</th>\n",
       "      <th>escolari</th>\n",
       "      <th>rez_esc</th>\n",
       "      <th>hhsize</th>\n",
       "      <th>paredblolad</th>\n",
       "      <th>paredzocalo</th>\n",
       "      <th>paredpreb</th>\n",
       "      <th>pareddes</th>\n",
       "      <th>paredmad</th>\n",
       "      <th>paredzinc</th>\n",
       "      <th>paredfibras</th>\n",
       "      <th>paredother</th>\n",
       "      <th>pisomoscer</th>\n",
       "      <th>pisocemento</th>\n",
       "      <th>pisoother</th>\n",
       "      <th>pisonatur</th>\n",
       "      <th>pisonotiene</th>\n",
       "      <th>pisomadera</th>\n",
       "      <th>techozinc</th>\n",
       "      <th>techoentrepiso</th>\n",
       "      <th>techocane</th>\n",
       "      <th>techootro</th>\n",
       "      <th>cielorazo</th>\n",
       "      <th>abastaguadentro</th>\n",
       "      <th>abastaguafuera</th>\n",
       "      <th>abastaguano</th>\n",
       "      <th>public</th>\n",
       "      <th>planpri</th>\n",
       "      <th>noelec</th>\n",
       "      <th>coopele</th>\n",
       "      <th>sanitario1</th>\n",
       "      <th>sanitario2</th>\n",
       "      <th>sanitario3</th>\n",
       "      <th>sanitario5</th>\n",
       "      <th>sanitario6</th>\n",
       "      <th>energcocinar1</th>\n",
       "      <th>energcocinar2</th>\n",
       "      <th>energcocinar3</th>\n",
       "      <th>energcocinar4</th>\n",
       "      <th>elimbasu1</th>\n",
       "      <th>elimbasu2</th>\n",
       "      <th>elimbasu3</th>\n",
       "      <th>elimbasu4</th>\n",
       "      <th>elimbasu5</th>\n",
       "      <th>elimbasu6</th>\n",
       "      <th>epared1</th>\n",
       "      <th>epared2</th>\n",
       "      <th>epared3</th>\n",
       "      <th>etecho1</th>\n",
       "      <th>etecho2</th>\n",
       "      <th>etecho3</th>\n",
       "      <th>eviv1</th>\n",
       "      <th>eviv2</th>\n",
       "      <th>eviv3</th>\n",
       "      <th>dis</th>\n",
       "      <th>male</th>\n",
       "      <th>female</th>\n",
       "      <th>estadocivil1</th>\n",
       "      <th>estadocivil2</th>\n",
       "      <th>estadocivil3</th>\n",
       "      <th>estadocivil4</th>\n",
       "      <th>estadocivil5</th>\n",
       "      <th>estadocivil6</th>\n",
       "      <th>estadocivil7</th>\n",
       "      <th>parentesco1</th>\n",
       "      <th>parentesco2</th>\n",
       "      <th>parentesco3</th>\n",
       "      <th>parentesco4</th>\n",
       "      <th>parentesco5</th>\n",
       "      <th>parentesco6</th>\n",
       "      <th>parentesco7</th>\n",
       "      <th>parentesco8</th>\n",
       "      <th>parentesco9</th>\n",
       "      <th>parentesco10</th>\n",
       "      <th>parentesco11</th>\n",
       "      <th>parentesco12</th>\n",
       "      <th>idhogar</th>\n",
       "      <th>hogar_nin</th>\n",
       "      <th>hogar_adul</th>\n",
       "      <th>hogar_mayor</th>\n",
       "      <th>hogar_total</th>\n",
       "      <th>dependency</th>\n",
       "      <th>edjefe</th>\n",
       "      <th>edjefa</th>\n",
       "      <th>meaneduc</th>\n",
       "      <th>instlevel1</th>\n",
       "      <th>instlevel2</th>\n",
       "      <th>instlevel3</th>\n",
       "      <th>instlevel4</th>\n",
       "      <th>instlevel5</th>\n",
       "      <th>instlevel6</th>\n",
       "      <th>instlevel7</th>\n",
       "      <th>instlevel8</th>\n",
       "      <th>instlevel9</th>\n",
       "      <th>bedrooms</th>\n",
       "      <th>overcrowding</th>\n",
       "      <th>tipovivi1</th>\n",
       "      <th>tipovivi2</th>\n",
       "      <th>tipovivi3</th>\n",
       "      <th>tipovivi4</th>\n",
       "      <th>tipovivi5</th>\n",
       "      <th>computer</th>\n",
       "      <th>television</th>\n",
       "      <th>mobilephone</th>\n",
       "      <th>qmobilephone</th>\n",
       "      <th>lugar1</th>\n",
       "      <th>lugar2</th>\n",
       "      <th>lugar3</th>\n",
       "      <th>lugar4</th>\n",
       "      <th>lugar5</th>\n",
       "      <th>lugar6</th>\n",
       "      <th>area1</th>\n",
       "      <th>area2</th>\n",
       "      <th>age</th>\n",
       "      <th>SQBescolari</th>\n",
       "      <th>SQBage</th>\n",
       "      <th>SQBhogar_total</th>\n",
       "      <th>SQBedjefe</th>\n",
       "      <th>SQBhogar_nin</th>\n",
       "      <th>SQBovercrowding</th>\n",
       "      <th>SQBdependency</th>\n",
       "      <th>SQBmeaned</th>\n",
       "      <th>agesq</th>\n",
       "      <th>Target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>ID_279628684</td>\n",
       "      <td>190000.00000</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>21eb7fcc1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>no</td>\n",
       "      <td>10</td>\n",
       "      <td>no</td>\n",
       "      <td>10.00000</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>43</td>\n",
       "      <td>100</td>\n",
       "      <td>1849</td>\n",
       "      <td>1</td>\n",
       "      <td>100</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>100.00000</td>\n",
       "      <td>1849</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ID_f29eb3ddd</td>\n",
       "      <td>135000.00000</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>12</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0e5d7a658</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "      <td>12</td>\n",
       "      <td>no</td>\n",
       "      <td>12.00000</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>67</td>\n",
       "      <td>144</td>\n",
       "      <td>4489</td>\n",
       "      <td>1</td>\n",
       "      <td>144</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>64.00000</td>\n",
       "      <td>144.00000</td>\n",
       "      <td>4489</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ID_68de51c94</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>11</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2c7317ea8</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "      <td>no</td>\n",
       "      <td>11</td>\n",
       "      <td>11.00000</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0.50000</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>92</td>\n",
       "      <td>121</td>\n",
       "      <td>8464</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.25000</td>\n",
       "      <td>64.00000</td>\n",
       "      <td>121.00000</td>\n",
       "      <td>8464</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>ID_d671db89c</td>\n",
       "      <td>180000.00000</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>9</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2b58d945f</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>yes</td>\n",
       "      <td>11</td>\n",
       "      <td>no</td>\n",
       "      <td>11.00000</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1.33333</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>17</td>\n",
       "      <td>81</td>\n",
       "      <td>289</td>\n",
       "      <td>16</td>\n",
       "      <td>121</td>\n",
       "      <td>4</td>\n",
       "      <td>1.77778</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>121.00000</td>\n",
       "      <td>289</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>ID_d56d6f5f5</td>\n",
       "      <td>180000.00000</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>11</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2b58d945f</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>yes</td>\n",
       "      <td>11</td>\n",
       "      <td>no</td>\n",
       "      <td>11.00000</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1.33333</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>37</td>\n",
       "      <td>121</td>\n",
       "      <td>1369</td>\n",
       "      <td>16</td>\n",
       "      <td>121</td>\n",
       "      <td>4</td>\n",
       "      <td>1.77778</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>121.00000</td>\n",
       "      <td>1369</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9552</th>\n",
       "      <td>ID_d45ae367d</td>\n",
       "      <td>80000.00000</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>d6c086aa3</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>.25</td>\n",
       "      <td>9</td>\n",
       "      <td>no</td>\n",
       "      <td>8.25000</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>1.25000</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>46</td>\n",
       "      <td>81</td>\n",
       "      <td>2116</td>\n",
       "      <td>25</td>\n",
       "      <td>81</td>\n",
       "      <td>1</td>\n",
       "      <td>1.56250</td>\n",
       "      <td>0.06250</td>\n",
       "      <td>68.06250</td>\n",
       "      <td>2116</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9553</th>\n",
       "      <td>ID_c94744e07</td>\n",
       "      <td>80000.00000</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>d6c086aa3</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>.25</td>\n",
       "      <td>9</td>\n",
       "      <td>no</td>\n",
       "      <td>8.25000</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>1.25000</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>25</td>\n",
       "      <td>81</td>\n",
       "      <td>1</td>\n",
       "      <td>1.56250</td>\n",
       "      <td>0.06250</td>\n",
       "      <td>68.06250</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9554</th>\n",
       "      <td>ID_85fc658f8</td>\n",
       "      <td>80000.00000</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>d6c086aa3</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>.25</td>\n",
       "      <td>9</td>\n",
       "      <td>no</td>\n",
       "      <td>8.25000</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>1.25000</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>50</td>\n",
       "      <td>25</td>\n",
       "      <td>2500</td>\n",
       "      <td>25</td>\n",
       "      <td>81</td>\n",
       "      <td>1</td>\n",
       "      <td>1.56250</td>\n",
       "      <td>0.06250</td>\n",
       "      <td>68.06250</td>\n",
       "      <td>2500</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9555</th>\n",
       "      <td>ID_ced540c61</td>\n",
       "      <td>80000.00000</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>11</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>d6c086aa3</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>.25</td>\n",
       "      <td>9</td>\n",
       "      <td>no</td>\n",
       "      <td>8.25000</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>1.25000</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>26</td>\n",
       "      <td>121</td>\n",
       "      <td>676</td>\n",
       "      <td>25</td>\n",
       "      <td>81</td>\n",
       "      <td>1</td>\n",
       "      <td>1.56250</td>\n",
       "      <td>0.06250</td>\n",
       "      <td>68.06250</td>\n",
       "      <td>676</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9556</th>\n",
       "      <td>ID_a38c64491</td>\n",
       "      <td>80000.00000</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>d6c086aa3</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>.25</td>\n",
       "      <td>9</td>\n",
       "      <td>no</td>\n",
       "      <td>8.25000</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>1.25000</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>21</td>\n",
       "      <td>64</td>\n",
       "      <td>441</td>\n",
       "      <td>25</td>\n",
       "      <td>81</td>\n",
       "      <td>1</td>\n",
       "      <td>1.56250</td>\n",
       "      <td>0.06250</td>\n",
       "      <td>68.06250</td>\n",
       "      <td>441</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>9557 rows × 143 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                Id          v2a1  hacdor  rooms  hacapo  v14a  refrig  v18q  \\\n",
       "0     ID_279628684  190000.00000       0      3       0     1       1     0   \n",
       "1     ID_f29eb3ddd  135000.00000       0      4       0     1       1     1   \n",
       "2     ID_68de51c94           NaN       0      8       0     1       1     0   \n",
       "3     ID_d671db89c  180000.00000       0      5       0     1       1     1   \n",
       "4     ID_d56d6f5f5  180000.00000       0      5       0     1       1     1   \n",
       "...            ...           ...     ...    ...     ...   ...     ...   ...   \n",
       "9552  ID_d45ae367d   80000.00000       0      6       0     1       1     0   \n",
       "9553  ID_c94744e07   80000.00000       0      6       0     1       1     0   \n",
       "9554  ID_85fc658f8   80000.00000       0      6       0     1       1     0   \n",
       "9555  ID_ced540c61   80000.00000       0      6       0     1       1     0   \n",
       "9556  ID_a38c64491   80000.00000       0      6       0     1       1     0   \n",
       "\n",
       "        v18q1  r4h1  r4h2  r4h3  r4m1  r4m2  r4m3  r4t1  r4t2  r4t3  tamhog  \\\n",
       "0         NaN     0     1     1     0     0     0     0     1     1       1   \n",
       "1     1.00000     0     1     1     0     0     0     0     1     1       1   \n",
       "2         NaN     0     0     0     0     1     1     0     1     1       1   \n",
       "3     1.00000     0     2     2     1     1     2     1     3     4       4   \n",
       "4     1.00000     0     2     2     1     1     2     1     3     4       4   \n",
       "...       ...   ...   ...   ...   ...   ...   ...   ...   ...   ...     ...   \n",
       "9552      NaN     0     2     2     1     2     3     1     4     5       5   \n",
       "9553      NaN     0     2     2     1     2     3     1     4     5       5   \n",
       "9554      NaN     0     2     2     1     2     3     1     4     5       5   \n",
       "9555      NaN     0     2     2     1     2     3     1     4     5       5   \n",
       "9556      NaN     0     2     2     1     2     3     1     4     5       5   \n",
       "\n",
       "      tamviv  escolari  rez_esc  hhsize  paredblolad  paredzocalo  paredpreb  \\\n",
       "0          1        10      NaN       1            1            0          0   \n",
       "1          1        12      NaN       1            0            0          0   \n",
       "2          1        11      NaN       1            0            0          0   \n",
       "3          4         9  1.00000       4            1            0          0   \n",
       "4          4        11      NaN       4            1            0          0   \n",
       "...      ...       ...      ...     ...          ...          ...        ...   \n",
       "9552       5         9      NaN       5            1            0          0   \n",
       "9553       5         0      NaN       5            1            0          0   \n",
       "9554       5         5      NaN       5            1            0          0   \n",
       "9555       5        11      NaN       5            1            0          0   \n",
       "9556       5         8      NaN       5            1            0          0   \n",
       "\n",
       "      pareddes  paredmad  paredzinc  paredfibras  paredother  pisomoscer  \\\n",
       "0            0         0          0            0           0           1   \n",
       "1            0         1          0            0           0           0   \n",
       "2            0         1          0            0           0           1   \n",
       "3            0         0          0            0           0           1   \n",
       "4            0         0          0            0           0           1   \n",
       "...        ...       ...        ...          ...         ...         ...   \n",
       "9552         0         0          0            0           0           0   \n",
       "9553         0         0          0            0           0           0   \n",
       "9554         0         0          0            0           0           0   \n",
       "9555         0         0          0            0           0           0   \n",
       "9556         0         0          0            0           0           0   \n",
       "\n",
       "      pisocemento  pisoother  pisonatur  pisonotiene  pisomadera  techozinc  \\\n",
       "0               0          0          0            0           0          0   \n",
       "1               0          0          0            0           1          1   \n",
       "2               0          0          0            0           0          1   \n",
       "3               0          0          0            0           0          1   \n",
       "4               0          0          0            0           0          1   \n",
       "...           ...        ...        ...          ...         ...        ...   \n",
       "9552            1          0          0            0           0          1   \n",
       "9553            1          0          0            0           0          1   \n",
       "9554            1          0          0            0           0          1   \n",
       "9555            1          0          0            0           0          1   \n",
       "9556            1          0          0            0           0          1   \n",
       "\n",
       "      techoentrepiso  techocane  techootro  cielorazo  abastaguadentro  \\\n",
       "0                  1          0          0          1                1   \n",
       "1                  0          0          0          1                1   \n",
       "2                  0          0          0          1                1   \n",
       "3                  0          0          0          1                1   \n",
       "4                  0          0          0          1                1   \n",
       "...              ...        ...        ...        ...              ...   \n",
       "9552               0          0          0          1                1   \n",
       "9553               0          0          0          1                1   \n",
       "9554               0          0          0          1                1   \n",
       "9555               0          0          0          1                1   \n",
       "9556               0          0          0          1                1   \n",
       "\n",
       "      abastaguafuera  abastaguano  public  planpri  noelec  coopele  \\\n",
       "0                  0            0       1        0       0        0   \n",
       "1                  0            0       1        0       0        0   \n",
       "2                  0            0       1        0       0        0   \n",
       "3                  0            0       1        0       0        0   \n",
       "4                  0            0       1        0       0        0   \n",
       "...              ...          ...     ...      ...     ...      ...   \n",
       "9552               0            0       0        0       0        1   \n",
       "9553               0            0       0        0       0        1   \n",
       "9554               0            0       0        0       0        1   \n",
       "9555               0            0       0        0       0        1   \n",
       "9556               0            0       0        0       0        1   \n",
       "\n",
       "      sanitario1  sanitario2  sanitario3  sanitario5  sanitario6  \\\n",
       "0              0           1           0           0           0   \n",
       "1              0           1           0           0           0   \n",
       "2              0           1           0           0           0   \n",
       "3              0           1           0           0           0   \n",
       "4              0           1           0           0           0   \n",
       "...          ...         ...         ...         ...         ...   \n",
       "9552           0           0           1           0           0   \n",
       "9553           0           0           1           0           0   \n",
       "9554           0           0           1           0           0   \n",
       "9555           0           0           1           0           0   \n",
       "9556           0           0           1           0           0   \n",
       "\n",
       "      energcocinar1  energcocinar2  energcocinar3  energcocinar4  elimbasu1  \\\n",
       "0                 0              0              1              0          1   \n",
       "1                 0              1              0              0          1   \n",
       "2                 0              1              0              0          1   \n",
       "3                 0              1              0              0          1   \n",
       "4                 0              1              0              0          1   \n",
       "...             ...            ...            ...            ...        ...   \n",
       "9552              0              0              1              0          0   \n",
       "9553              0              0              1              0          0   \n",
       "9554              0              0              1              0          0   \n",
       "9555              0              0              1              0          0   \n",
       "9556              0              0              1              0          0   \n",
       "\n",
       "      elimbasu2  elimbasu3  elimbasu4  elimbasu5  elimbasu6  epared1  epared2  \\\n",
       "0             0          0          0          0          0        0        1   \n",
       "1             0          0          0          0          0        0        1   \n",
       "2             0          0          0          0          0        0        1   \n",
       "3             0          0          0          0          0        0        0   \n",
       "4             0          0          0          0          0        0        0   \n",
       "...         ...        ...        ...        ...        ...      ...      ...   \n",
       "9552          1          0          0          0          0        0        1   \n",
       "9553          1          0          0          0          0        0        1   \n",
       "9554          1          0          0          0          0        0        1   \n",
       "9555          1          0          0          0          0        0        1   \n",
       "9556          1          0          0          0          0        0        1   \n",
       "\n",
       "      epared3  etecho1  etecho2  etecho3  eviv1  eviv2  eviv3  dis  male  \\\n",
       "0           0        1        0        0      1      0      0    0     1   \n",
       "1           0        0        1        0      0      1      0    0     1   \n",
       "2           0        0        0        1      0      0      1    1     0   \n",
       "3           1        0        0        1      0      0      1    0     1   \n",
       "4           1        0        0        1      0      0      1    0     0   \n",
       "...       ...      ...      ...      ...    ...    ...    ...  ...   ...   \n",
       "9552        0        0        1        0      0      1      0    0     1   \n",
       "9553        0        0        1        0      0      1      0    0     0   \n",
       "9554        0        0        1        0      0      1      0    0     0   \n",
       "9555        0        0        1        0      0      1      0    0     0   \n",
       "9556        0        0        1        0      0      1      0    0     1   \n",
       "\n",
       "      female  estadocivil1  estadocivil2  estadocivil3  estadocivil4  \\\n",
       "0          0             0             0             0             1   \n",
       "1          0             0             0             0             1   \n",
       "2          1             0             0             0             0   \n",
       "3          0             0             0             0             0   \n",
       "4          1             0             1             0             0   \n",
       "...      ...           ...           ...           ...           ...   \n",
       "9552       0             0             1             0             0   \n",
       "9553       1             1             0             0             0   \n",
       "9554       1             0             1             0             0   \n",
       "9555       1             0             0             0             0   \n",
       "9556       0             0             0             0             0   \n",
       "\n",
       "      estadocivil5  estadocivil6  estadocivil7  parentesco1  parentesco2  \\\n",
       "0                0             0             0            1            0   \n",
       "1                0             0             0            1            0   \n",
       "2                0             1             0            1            0   \n",
       "3                0             0             1            0            0   \n",
       "4                0             0             0            0            1   \n",
       "...            ...           ...           ...          ...          ...   \n",
       "9552             0             0             0            1            0   \n",
       "9553             0             0             0            0            0   \n",
       "9554             0             0             0            0            1   \n",
       "9555             0             0             1            0            0   \n",
       "9556             0             0             1            0            0   \n",
       "\n",
       "      parentesco3  parentesco4  parentesco5  parentesco6  parentesco7  \\\n",
       "0               0            0            0            0            0   \n",
       "1               0            0            0            0            0   \n",
       "2               0            0            0            0            0   \n",
       "3               1            0            0            0            0   \n",
       "4               0            0            0            0            0   \n",
       "...           ...          ...          ...          ...          ...   \n",
       "9552            0            0            0            0            0   \n",
       "9553            1            0            0            0            0   \n",
       "9554            0            0            0            0            0   \n",
       "9555            1            0            0            0            0   \n",
       "9556            1            0            0            0            0   \n",
       "\n",
       "      parentesco8  parentesco9  parentesco10  parentesco11  parentesco12  \\\n",
       "0               0            0             0             0             0   \n",
       "1               0            0             0             0             0   \n",
       "2               0            0             0             0             0   \n",
       "3               0            0             0             0             0   \n",
       "4               0            0             0             0             0   \n",
       "...           ...          ...           ...           ...           ...   \n",
       "9552            0            0             0             0             0   \n",
       "9553            0            0             0             0             0   \n",
       "9554            0            0             0             0             0   \n",
       "9555            0            0             0             0             0   \n",
       "9556            0            0             0             0             0   \n",
       "\n",
       "        idhogar  hogar_nin  hogar_adul  hogar_mayor  hogar_total dependency  \\\n",
       "0     21eb7fcc1          0           1            0            1         no   \n",
       "1     0e5d7a658          0           1            1            1          8   \n",
       "2     2c7317ea8          0           1            1            1          8   \n",
       "3     2b58d945f          2           2            0            4        yes   \n",
       "4     2b58d945f          2           2            0            4        yes   \n",
       "...         ...        ...         ...          ...          ...        ...   \n",
       "9552  d6c086aa3          1           4            0            5        .25   \n",
       "9553  d6c086aa3          1           4            0            5        .25   \n",
       "9554  d6c086aa3          1           4            0            5        .25   \n",
       "9555  d6c086aa3          1           4            0            5        .25   \n",
       "9556  d6c086aa3          1           4            0            5        .25   \n",
       "\n",
       "     edjefe edjefa  meaneduc  instlevel1  instlevel2  instlevel3  instlevel4  \\\n",
       "0        10     no  10.00000           0           0           0           1   \n",
       "1        12     no  12.00000           0           0           0           0   \n",
       "2        no     11  11.00000           0           0           0           0   \n",
       "3        11     no  11.00000           0           0           0           1   \n",
       "4        11     no  11.00000           0           0           0           0   \n",
       "...     ...    ...       ...         ...         ...         ...         ...   \n",
       "9552      9     no   8.25000           0           0           0           1   \n",
       "9553      9     no   8.25000           1           0           0           0   \n",
       "9554      9     no   8.25000           0           1           0           0   \n",
       "9555      9     no   8.25000           0           0           0           0   \n",
       "9556      9     no   8.25000           0           0           0           1   \n",
       "\n",
       "      instlevel5  instlevel6  instlevel7  instlevel8  instlevel9  bedrooms  \\\n",
       "0              0           0           0           0           0         1   \n",
       "1              0           0           0           1           0         1   \n",
       "2              1           0           0           0           0         2   \n",
       "3              0           0           0           0           0         3   \n",
       "4              1           0           0           0           0         3   \n",
       "...          ...         ...         ...         ...         ...       ...   \n",
       "9552           0           0           0           0           0         4   \n",
       "9553           0           0           0           0           0         4   \n",
       "9554           0           0           0           0           0         4   \n",
       "9555           1           0           0           0           0         4   \n",
       "9556           0           0           0           0           0         4   \n",
       "\n",
       "      overcrowding  tipovivi1  tipovivi2  tipovivi3  tipovivi4  tipovivi5  \\\n",
       "0          1.00000          0          0          1          0          0   \n",
       "1          1.00000          0          0          1          0          0   \n",
       "2          0.50000          1          0          0          0          0   \n",
       "3          1.33333          0          0          1          0          0   \n",
       "4          1.33333          0          0          1          0          0   \n",
       "...            ...        ...        ...        ...        ...        ...   \n",
       "9552       1.25000          0          0          1          0          0   \n",
       "9553       1.25000          0          0          1          0          0   \n",
       "9554       1.25000          0          0          1          0          0   \n",
       "9555       1.25000          0          0          1          0          0   \n",
       "9556       1.25000          0          0          1          0          0   \n",
       "\n",
       "      computer  television  mobilephone  qmobilephone  lugar1  lugar2  lugar3  \\\n",
       "0            0           0            1             1       1       0       0   \n",
       "1            0           0            1             1       1       0       0   \n",
       "2            0           0            0             0       1       0       0   \n",
       "3            0           0            1             3       1       0       0   \n",
       "4            0           0            1             3       1       0       0   \n",
       "...        ...         ...          ...           ...     ...     ...     ...   \n",
       "9552         0           1            1             3       0       0       0   \n",
       "9553         0           1            1             3       0       0       0   \n",
       "9554         0           1            1             3       0       0       0   \n",
       "9555         0           1            1             3       0       0       0   \n",
       "9556         0           1            1             3       0       0       0   \n",
       "\n",
       "      lugar4  lugar5  lugar6  area1  area2  age  SQBescolari  SQBage  \\\n",
       "0          0       0       0      1      0   43          100    1849   \n",
       "1          0       0       0      1      0   67          144    4489   \n",
       "2          0       0       0      1      0   92          121    8464   \n",
       "3          0       0       0      1      0   17           81     289   \n",
       "4          0       0       0      1      0   37          121    1369   \n",
       "...      ...     ...     ...    ...    ...  ...          ...     ...   \n",
       "9552       0       0       1      0      1   46           81    2116   \n",
       "9553       0       0       1      0      1    2            0       4   \n",
       "9554       0       0       1      0      1   50           25    2500   \n",
       "9555       0       0       1      0      1   26          121     676   \n",
       "9556       0       0       1      0      1   21           64     441   \n",
       "\n",
       "      SQBhogar_total  SQBedjefe  SQBhogar_nin  SQBovercrowding  SQBdependency  \\\n",
       "0                  1        100             0          1.00000        0.00000   \n",
       "1                  1        144             0          1.00000       64.00000   \n",
       "2                  1          0             0          0.25000       64.00000   \n",
       "3                 16        121             4          1.77778        1.00000   \n",
       "4                 16        121             4          1.77778        1.00000   \n",
       "...              ...        ...           ...              ...            ...   \n",
       "9552              25         81             1          1.56250        0.06250   \n",
       "9553              25         81             1          1.56250        0.06250   \n",
       "9554              25         81             1          1.56250        0.06250   \n",
       "9555              25         81             1          1.56250        0.06250   \n",
       "9556              25         81             1          1.56250        0.06250   \n",
       "\n",
       "      SQBmeaned  agesq  Target  \n",
       "0     100.00000   1849       4  \n",
       "1     144.00000   4489       4  \n",
       "2     121.00000   8464       4  \n",
       "3     121.00000    289       4  \n",
       "4     121.00000   1369       4  \n",
       "...         ...    ...     ...  \n",
       "9552   68.06250   2116       2  \n",
       "9553   68.06250      4       2  \n",
       "9554   68.06250   2500       2  \n",
       "9555   68.06250    676       2  \n",
       "9556   68.06250    441       2  \n",
       "\n",
       "[9557 rows x 143 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train['初始数据']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "ea30425d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# # 性别转化\n",
    "# def sex_binning(x):\n",
    "#     if x == 'A':\n",
    "#         return 1\n",
    "#     elif x == 'B':\n",
    "#         return 2\n",
    "#     else:\n",
    "#         return np.nan\n",
    "# df_train['nature_NTRL_CUST_SEX_CD'] = df_train['nature_NTRL_CUST_SEX_CD'].apply(lambda x: sex_binning(x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "9f7df561",
   "metadata": {},
   "outputs": [],
   "source": [
    "# # 客户等级转化\n",
    "# def rank_binning(x):\n",
    "#     if x == 'A':\n",
    "#         return 1\n",
    "#     elif x == 'B':\n",
    "#         return 2\n",
    "#     elif x == 'C':\n",
    "#         return 3\n",
    "#     elif x == 'D':\n",
    "#         return 4\n",
    "#     elif x == 'E':\n",
    "#         return 5\n",
    "#     elif x == 'F':\n",
    "#         return 6\n",
    "# df_train['nature_NTRL_RANK_CD'] = df_train['nature_NTRL_RANK_CD'].apply(lambda x: rank_binning(x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "63c6a216",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>dependency</th>\n",
       "      <th>edjefa</th>\n",
       "      <th>edjefe</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>9557.00000</td>\n",
       "      <td>9557.00000</td>\n",
       "      <td>9557.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>1.14955</td>\n",
       "      <td>2.89683</td>\n",
       "      <td>5.09679</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>1.60599</td>\n",
       "      <td>4.61206</td>\n",
       "      <td>5.24651</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>0.33333</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>0.66667</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>6.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>1.33333</td>\n",
       "      <td>6.00000</td>\n",
       "      <td>9.00000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>8.00000</td>\n",
       "      <td>21.00000</td>\n",
       "      <td>21.00000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       dependency      edjefa      edjefe\n",
       "count  9557.00000  9557.00000  9557.00000\n",
       "mean      1.14955     2.89683     5.09679\n",
       "std       1.60599     4.61206     5.24651\n",
       "min       0.00000     0.00000     0.00000\n",
       "25%       0.33333     0.00000     0.00000\n",
       "50%       0.66667     0.00000     6.00000\n",
       "75%       1.33333     6.00000     9.00000\n",
       "max       8.00000    21.00000    21.00000"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 字典转化\n",
    "mapping = {\"yes\": 1, \"no\": 0}\n",
    "\n",
    "# 在训练集和测试集应用相同操作\n",
    "for df in [df_train['初始数据'], df_test['初始数据']]:\n",
    "    # 用正确的映射填充值\n",
    "    df['dependency'] = df['dependency'].replace(mapping).astype(np.float64)\n",
    "    df['edjefa'] = df['edjefa'].replace(mapping).astype(np.float64)\n",
    "    df['edjefe'] = df['edjefe'].replace(mapping).astype(np.float64)\n",
    "\n",
    "df_train['初始数据'][['dependency', 'edjefa', 'edjefe']].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "6b50c6e1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       21eb7fcc1\n",
       "1       0e5d7a658\n",
       "2       2c7317ea8\n",
       "3       2b58d945f\n",
       "4       2b58d945f\n",
       "          ...    \n",
       "9552    d6c086aa3\n",
       "9553    d6c086aa3\n",
       "9554    d6c086aa3\n",
       "9555    d6c086aa3\n",
       "9556    d6c086aa3\n",
       "Name: idhogar, Length: 9557, dtype: object"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train['初始数据']['idhogar']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "53ef1d2f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "总共有85个家庭存在家庭成员标签值不同\n"
     ]
    }
   ],
   "source": [
    "# 首先，为了找到家庭成员标签不同的家庭，我们可以根据家庭对数据进行分组，然后检查目标是否只有一个唯一的值。\n",
    "\n",
    "all_equal = df_train['初始数据'].groupby('idhogar')['Target'].apply(lambda x: x.nunique() == 1)\n",
    "\n",
    "# 目标不完全相同的家庭\n",
    "not_equal = all_equal[all_equal != True]\n",
    "print('总共有{}个家庭存在家庭成员标签值不同'.format(len(not_equal)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "d334a733",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "总共有0个家庭，家庭成员并不都有相同的目标\n"
     ]
    }
   ],
   "source": [
    "# 遍历每个家庭\n",
    "for household in not_equal.index:\n",
    "    # 找到正确的标签(适用于户主)\n",
    "    true_target = int(df_train['初始数据'][(df_train['初始数据']['idhogar'] == household) & (df_train['初始数据']['parentesco1'] == 1.0)]['Target'])\n",
    "    \n",
    "    # 为所有家庭成员设置正确的标签\n",
    "    df_train['初始数据'].loc[df_train['初始数据']['idhogar'] == household, 'Target'] = true_target\n",
    "    \n",
    "    \n",
    "# 按户分组，求出唯一值的个数\n",
    "all_equal = df_train['初始数据'].groupby('idhogar')['Target'].apply(lambda x: x.nunique() == 1)\n",
    "\n",
    "# 目标不完全相同的家庭\n",
    "not_equal = all_equal[all_equal != True]\n",
    "print('总共有{}个家庭，家庭成员并不都有相同的目标'.format(len(not_equal)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "d5a0f50c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 金额还原"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f0c1c9c2",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "6a1ee59c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 缺失值处理\n",
    "\n",
    "\n",
    "# ​​rez_esc​​：离开学校的年数\n",
    "# 对于零值的家庭，他们可能目前没有孩子上学。可以通过找出不为空值行的年龄来验证这一点。\n",
    "# 缺失值的最大年龄是17岁。对于比这个年龄大的人，也可以假设他们根本就没在上学。\n",
    "# 对于这个变量，如果个体大于19，并且他们该值缺失，或者如果他们小于7，并且该值缺失，我们可以把它设为0；对于其他人，将保留原值，并添加一个布尔标记。\n",
    "# 在 rez_esc 列中还有一个离群值。同样，由讨论可知这个变量的最大值是5。因此，任何大于5的值都应该设置为5。\n",
    "\n",
    "for data in [df_train['初始数据'] , df_test['初始数据']] :\n",
    "    # 如果年龄在19岁以上或7岁以下，并且有缺失值，则将其设置为0\n",
    "    data.loc[((data['age'] > 19) | (data['age'] < 7)) & (data['rez_esc'].isnull()), 'rez_esc'] = 0\n",
    "    # 其他人添加标志\n",
    "    data['rez_esc-missing'] = data['rez_esc'].isnull()\n",
    "\n",
    "# v18q1 为 nan 的家庭都没有平板电脑。因此，我们可以用0来填充这个缺失的值。\n",
    "\n",
    "for df in [df_train['初始数据'] , df_test['初始数据']] :\n",
    "    df['v18q1'] = df['v18q1'].fillna(0)\n",
    "\n",
    "# v2a1:每月的租金支付\n",
    "# 除了查看每月租金支付的缺失值之外，还可以查看tipovivi_的分布情况，显示房屋所有权/出租状态的列。这幅图展示了那些v2a1为nan的状态。\n",
    "\n",
    "\n",
    "for df in [df_train['初始数据'] , df_test['初始数据']] :\n",
    "    # 用0租金填写拥有房子的家庭\n",
    "    df.loc[(df['tipovivi1'] == 1), 'v2a1'] = 0\n",
    "    # 创建缺失的租金支付栏\n",
    "    df['v2a1-missing'] = df['v2a1'].isnull()\n",
    "    df['v2a1-missing'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "62804667",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 异常值处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "aae6525c",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d29e96f8",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "e2607d3f",
   "metadata": {},
   "outputs": [],
   "source": [
    "table_name = '初始数据'\n",
    "dir = zhx_config['预处理后dir']\n",
    "\n",
    "pickle.dump(df_train[table_name], open(dir + '/train/' + table_name + '.pkl', 'wb'))\n",
    "pickle.dump(df_test[table_name], open(dir + '/test/' + table_name + '.pkl', 'wb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e88836e8",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
